Author

Behram Radmanesh

Github Link

Problem 1 - OOP Programming

Part A

Code
## Create the gcd and lcm function using Rcpp
library(Rcpp)

## Include these functions
sourceCpp("gcd_lcm.cpp")

## Create the 'rational' constructor
setClass("rational",
         contains = "numeric",
         slots = c(
           numerator = "numeric",
           denominator = "numeric"
         ),
         ## check to ensure 0 isn't in denominator
         validity = function(object) {
           if (object@denominator == 0) {
             stop("Denominator cannot be zero.")
           }
           ## check to ensure complex numbers aren't used
           if (is.complex(object@numerator) || is.complex(object@denominator)) {
             stop("Numerator and denominator cannot be complex numbers.")
           }
           TRUE
         })

## 'show' method for clean outputs
setMethod("show", "rational",
          function(object) {
            cat(object@numerator, "/", object@denominator, "\n")
          })

 

## Streamlined 'simplify' method for rational objects
setGeneric("simplify", 
           function(object) standardGeneric("simplify"))
[1] "simplify"
Code
setMethod("simplify", "rational",
          function(object) {
            gcd <- gcd(object@numerator, object@denominator)
            new("rational",
                numerator = object@numerator %/% gcd,
                denominator = object@denominator %/% gcd)
          })

## Create quotient and various dependent arithmetic methods
setGeneric("quotient", 
           function(object, digits = 7) standardGeneric("quotient"))
[1] "quotient"
Code
setMethod("quotient", "rational",
          function(object, digits) {
            result <- object@numerator / object@denominator
            cat(round(result, digits), "\n")
          })

setMethod("+", signature(e1 = "rational", e2 = "rational"),
          function(e1, e2) {
            common_denom <- lcm(e1@denominator, e2@denominator)
            new_numerator <- (e1@numerator * (common_denom / e1@denominator)) +
                             (e2@numerator * (common_denom / e2@denominator))
            new_rational <- new("rational", numerator = new_numerator,
                                denominator = common_denom)
            return(simplify(new_rational))
          })

setMethod("-", signature(e1 = "rational", e2 = "rational"),
          function(e1, e2) {
            common_denom <- lcm(e1@denominator, e2@denominator)
            new_numerator <- (e1@numerator * (common_denom / e1@denominator)) -
                             (e2@numerator * (common_denom / e2@denominator))
            new_rational <- new("rational", numerator = new_numerator,
                                denominator = common_denom)
            return(simplify(new_rational))
          })

setMethod("*", signature(e1 = "rational", e2 = "rational"),
          function(e1, e2) {
            new_numerator <- e1@numerator * e2@numerator
            new_denominator <- e1@denominator * e2@denominator
            new_rational <- new("rational", numerator = new_numerator,
                                denominator = new_denominator)
            return(simplify(new_rational))
          })

setMethod("/", signature(e1 = "rational", e2 = "rational"),
          function(e1, e2) {
            new_numerator <- e1@numerator * e2@denominator
            new_denominator <- e1@denominator * e2@numerator
            new_rational <- new("rational", numerator = new_numerator,
                                denominator = new_denominator)
            return(simplify(new_rational))
          })

Part B

Code
# Examples
r1 <- new("rational", numerator = 24, denominator = 6)
r2 <- new("rational", numerator = 7, denominator = 230)
r3 <- new("rational", numerator = 0, denominator = 4)

r1
24 / 6 
Code
r3
0 / 4 
Code
r1 + r2
927 / 230 
Code
r1 - r2
913 / 230 
Code
r1 * r2
14 / 115 
Code
r1 / r2
920 / 7 
Code
r1 + r3
4 / 1 
Code
r1 * r3
0 / 1 
Code
r2 / r3
Error in validityMethod(object): Denominator cannot be zero.
Code
quotient(r1)
4 
Code
quotient(r2)
0.0304348 
Code
quotient(r2, digits = 3)
0.03 
Code
quotient(r2, digits = 3.14)
0.03 
Code
quotient(r2, digits = "avocado")
Error in round(result, digits): non-numeric argument to mathematical function
Code
q2 <- quotient(r2, digits = 3)
0.03 
Code
q2
NULL
Code
quotient(r3)
0 
Code
simplify(r1)
4 / 1 
Code
simplify(r2)
7 / 230 
Code
simplify(r3)
0 / 1 

Part C

Code
## Check for Zero in denominator
r4 <- new("rational", numerator = 24, denominator = 0)
Error in validityMethod(object): Denominator cannot be zero.
Code
## Check for Complex number
r5 <- new("rational", numerator = 2i, denominator = 1)
Error in validObject(.Object): invalid class "rational" object: invalid object for slot "numerator" in class "rational": got class "complex", should be or extend class "numeric"

Problem 2 - plotly

Part A: Any Change in the Distribution of Genre Of Sales?

Note: Professor Errickson’s plot was used for this analysis as allowed by the problem instructions.

As such the ‘gg’ object will be passed into the ‘ggplotly’ function.

Code
library(plotly)
Warning: package 'plotly' was built under R version 4.4.2
Loading required package: ggplot2

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
Code
library(ggplot2)

## Professor Errickson's Plot was used
art <- read.csv("data/art.csv")

## Cleanup Data
art$Genre___Others[art$Genre___Painting == 1] <- 0
unique(art[, grep("^Genre", names(art))])
     Genre___Photography Genre___Print Genre___Sculpture Genre___Painting
1                      0             0                 0                1
2                      0             0                 1                0
5                      1             0                 0                0
123                    0             1                 0                0
1444                   0             0                 0                0
     Genre___Others
1                 0
2                 0
5                 0
123               0
1444              1
Code
## Rename Table Headers
art$genre <- "Photography"
art$genre[art$Genre___Print == 1] <- "Print"
art$genre[art$Genre___Sculpture == 1] <- "Sculpture"
art$genre[art$Genre___Painting == 1] <- "Painting"
art$genre[art$Genre___Others == 1] <- "Other"

## Subset Genre w/ respect to year
(yeargenre <- with(art, table(year, genre)))
      genre
year   Other Painting Photography Print Sculpture
  1997     0        8           3     0         5
  1998     0        5           3     0         4
  1999     0        8          17     0         5
  2000     0       19          34     2        53
  2001     0       18          50     7        37
  2002     0       11          50     6        29
  2003     0       12          73    13        70
  2004     0       23          86     7        72
  2005     0       32         122    26       122
  2006     0       57         165    43       129
  2007     5       47         158    43       146
  2008     4       31         166    54       153
  2009     3       41         165    55       149
  2010     5       42         184    37       143
  2011     6       95         247    80       289
  2012     4       70         223    41       235
Code
## Obtain the proportions
ygperc <- yeargenre/apply(yeargenre, 1, sum)
ygperc <- ygperc[, c("Painting", "Sculpture", "Photography", "Print", "Other")]

## Convert to DataFrame and reverse levels of factor for ggplot
ygpercm <- as.data.frame(ygperc)
ygpercm$genre <- factor(ygpercm$genre, 
                        levels = rev(unique(ygpercm$genre)))

## Create the ggplot object
g <- ggplot(ygpercm, aes(y = Freq, x = year, fill = genre)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(y = NULL, x = NULL, title = "Proportion of Genre of Art Sales") +
  theme(legend.position = "off") +
  geom_text(data = ygpercm[ygpercm$year == 2012 & ygpercm$genre != "Other", ],
            aes(label = genre),
            position = position_stack(vjust = 0.5),
            color = "white",
            size = 4) +
  # Add the Other label
  geom_segment(aes(xend = 16, yend = 1, x = 15, y = 1.02),
               arrow = arrow(length = unit(0.15, "inches")),
               linewidth = .5, color = "black") +
  annotate("text", x = 14.9, y = 1.02, label = "Other", hjust = 0, angle = 270)

## Pass the ggplot object into ggplotly
ggplotly(g)

Part B: Genre’s Affect On Sales Price

Code
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Code
## Median art sale prices
artmedian <- aggregate(art$price_usd, by = list(art$year, art$genre),
                       FUN = median, na.rm = TRUE)
names(artmedian) <- c("year", "genre", "price_usd")

## 97.5% art sale prices
art975 <- aggregate(art$price_usd, by = list(art$year, art$genre),
                   FUN = quantile, .975, na.rm = TRUE)
names(art975) <- c("year", "genre", "price_usd")

## Factorize Genre for ggplot
artmedian$genre <- factor(artmedian$genre, 
                          levels = rev(unique(artmedian$genre)))
art975$genre <- factor(art975$genre, levels = rev(unique(art975$genre)))
artcombine <- bind_rows(artmedian %>% mutate(measure = "Median"),
                        art975 %>% mutate(measure = "97.5%"))

## Create gg object
g2 <- ggplot(artcombine, aes(x = year, y = price_usd, color = genre,
                             linetype = measure)) +
  geom_line() +
  scale_y_continuous(
    name = "Price in Thousands USD",
    breaks = seq(0, 350000, by = 50000),
    labels = paste(seq(0, 350, by = 50), "k", sep = "")
  ) +
  scale_x_continuous(
    name = NULL,
    breaks = seq(1997, 2012, by = 2),
    limits = c(1997, 2012)
  ) +
  labs(title = "Changes in Price by Genre") +
  scale_color_manual(values = 1:5) +
  scale_linetype_manual(values = c("97.5%" = "dotted", "Median" = "solid")) +
  theme(
    legend.position = "inside",
    legend.position.inside = c(.1, .75),
    legend.background = element_blank(),
    legend.title = element_blank()
  )

## Push gg object into plotly
ggplotly(g2)

There is a change in sales price overtime w/ respect to outliers but not median. If mean was used then the outliers would have affected the mean price because they are so dramatic.

Problem 3 - data.table

Part A

Code
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
Code
library(nycflights13)

## Read in 'flights' data w/ data.table function
flights <- data.table(flights)

## Obtain departures
mergeDep <- merge(flights[, faa := origin],
                  airports,
                  by = "faa",
                  all.x = TRUE)

## Group by name and finalize table
depart <- mergeDep[, .(meanDelay = mean(dep_delay, na.rm = TRUE),
                       medianDelay = median(dep_delay, na.rm = TRUE)),
                   by = name]

depart
                  name meanDelay medianDelay
                <char>     <num>       <num>
1: Newark Liberty Intl  15.10795          -1
2: John F Kennedy Intl  12.11216          -1
3:          La Guardia  10.34688          -3
Code
## Obtain arrivals
mergeArr <- merge(flights[, faa := dest],
                  airports,
                  by = "faa",
                  all.x = TRUE)

## Group by name
arrival <- mergeArr[, .(meanDelay = mean(arr_delay, na.rm = TRUE),
                        medianDelay = median(arr_delay, na.rm = TRUE)),
                    by = name]

arrival |> _[order(meanDelay, decreasing = TRUE)] |>
  print(nrows = 102)
                                     name    meanDelay medianDelay
                                   <char>        <num>       <num>
  1:                Columbia Metropolitan  41.76415094        28.0
  2:                           Tulsa Intl  33.65986395        14.0
  3:                    Will Rogers World  30.61904762        16.0
  4:                 Jackson Hole Airport  28.09523810        15.0
  5:                        Mc Ghee Tyson  24.06920415         2.0
  6:               Dane Co Rgnl Truax Fld  20.19604317         1.0
  7:                        Richmond Intl  20.11125320         1.0
  8:        Akron Canton Regional Airport  19.69833729         3.0
  9:                      Des Moines Intl  19.00573614         0.0
 10:                   Gerald R Ford Intl  18.18956044         1.0
 11:                      Birmingham Intl  16.87732342        -2.0
 12:         Theodore Francis Green State  16.23463687         1.0
 13: Greenville-Spartanburg International  15.93544304        -0.5
 14:    Cincinnati Northern Kentucky Intl  15.36456376        -3.0
 15:            Savannah Hilton Head Intl  15.12950601        -1.0
 16:          Manchester Regional Airport  14.78755365        -3.0
 17:                          Eppley Afld  14.69889841        -2.0
 18:                               Yeager  14.67164179        -1.5
 19:                     Kansas City Intl  14.51405836         0.0
 20:                          Albany Intl  14.39712919        -4.0
 21:                General Mitchell Intl  14.16722038         0.0
 22:                       Piedmont Triad  14.11260054        -2.0
 23:               Washington Dulles Intl  13.86420212        -3.0
 24:               Cherry Capital Airport  12.96842105       -10.0
 25:              James M Cox Dayton Intl  12.68048606        -3.0
 26:     Louisville International Airport  12.66938406        -2.0
 27:                  Chicago Midway Intl  12.36422360        -1.0
 28:                      Sacramento Intl  12.10992908         4.0
 29:                    Jacksonville Intl  11.84483416        -2.0
 30:                       Nashville Intl  11.81245891        -2.0
 31:                Portland Intl Jetport  11.66040210        -4.0
 32:               Greater Rochester Intl  11.56064461        -5.0
 33:      Hartsfield Jackson Atlanta Intl  11.30011285        -1.0
 34:                Lambert St Louis Intl  11.07846451        -3.0
 35:                         Norfolk Intl  10.94909344        -4.0
 36:            Baltimore Washington Intl  10.72673385        -5.0
 37:                         Memphis Intl  10.64531435        -2.5
 38:                   Port Columbus Intl  10.60132291        -3.0
 39:                  Charleston Afb Intl  10.59296847        -4.0
 40:                    Philadelphia Intl  10.12719014        -3.0
 41:                  Raleigh Durham Intl  10.05238095        -3.0
 42:                    Indianapolis Intl   9.94043412        -3.0
 43:            Charlottesville-Albemarle   9.50000000        -5.0
 44:               Cleveland Hopkins Intl   9.18161129        -5.0
 45:        Ronald Reagan Washington Natl   9.06695204        -2.0
 46:                      Burlington Intl   8.95099602        -4.0
 47:                 Buffalo Niagara Intl   8.94595186        -5.0
 48:                Syracuse Hancock Intl   8.90392501        -5.0
 49:                          Denver Intl   8.60650021        -2.0
 50:                      Palm Beach Intl   8.56297210        -3.0
 51:                             Bob Hope   8.17567568        -3.0
 52:       Fort Lauderdale Hollywood Intl   8.08212154        -3.0
 53:                          Bangor Intl   8.02793296        -9.0
 54:           Asheville Regional Airport   8.00383142        -1.0
 55:                      Pittsburgh Intl   7.68099053        -5.0
 56:                       Gallatin Field   7.60000000        -2.0
 57:                 NW Arkansas Regional   7.46572581        -2.0
 58:                           Tampa Intl   7.40852503        -4.0
 59:               Charlotte Douglas Intl   7.36031885        -3.0
 60:             Minneapolis St Paul Intl   7.27016886        -5.0
 61:                      William P Hobby   7.17618819        -4.0
 62:                         Bradley Intl   7.04854369       -10.0
 63:                     San Antonio Intl   6.94537178        -9.0
 64:                      South Bend Rgnl   6.50000000        -3.5
 65:     Louis Armstrong New Orleans Intl   6.49017497        -6.0
 66:                        Key West Intl   6.35294118         7.0
 67:                        Eagle Co Rgnl   6.30434783        -4.0
 68:                Austin Bergstrom Intl   6.01990875        -5.0
 69:                   Chicago Ohare Intl   5.87661475        -8.0
 70:                         Orlando Intl   5.45464309        -5.0
 71:               Detroit Metro Wayne Co   5.42996346        -7.0
 72:                        Portland Intl   5.14157973        -5.0
 73:                        Nantucket Mem   4.85227273        -3.0
 74:                      Wilmington Intl   4.63551402        -7.0
 75:                    Myrtle Beach Intl   4.60344828       -13.0
 76:    Albuquerque International Sunport   4.38188976        -5.5
 77:         George Bush Intercontinental   4.24079040        -5.0
 78:        Norman Y Mineta San Jose Intl   3.44817073        -7.0
 79:               Southwest Florida Intl   3.23814963        -5.0
 80:                       San Diego Intl   3.13916574        -5.0
 81:              Sarasota Bradenton Intl   3.08243131        -5.0
 82:            Metropolitan Oakland Intl   3.07766990        -9.0
 83:                                 <NA>   3.01233913        -5.0
 84:   General Edward Lawrence Logan Intl   2.91439222        -9.0
 85:                   San Francisco Intl   2.67289152        -8.0
 86:                         Yampa Valley   2.14285714         2.0
 87:              Phoenix Sky Harbor Intl   2.09704733        -6.0
 88:            Montrose Regional Airport   1.78571429       -10.5
 89:                     Los Angeles Intl   0.54711094        -7.0
 90:               Dallas Fort Worth Intl   0.32212685        -9.0
 91:                           Miami Intl   0.29905978        -9.0
 92:                       Mc Carran Intl   0.25772849        -8.0
 93:                  Salt Lake City Intl   0.17625459        -8.0
 94:                           Long Beach  -0.06202723       -10.0
 95:                Martha\\\\'s Vineyard  -0.28571429       -11.0
 96:                  Seattle Tacoma Intl  -1.09909910       -11.0
 97:                        Honolulu Intl  -1.36519258        -7.0
 98:           Ted Stevens Anchorage Intl  -2.50000000         1.5
 99:            John Wayne Arpt Orange Co  -7.86822660       -11.0
100:                    Palm Springs Intl -12.72222222       -13.5
101:                           Blue Grass -22.00000000       -22.0
102:                           La Guardia          NaN          NA
                                     name    meanDelay medianDelay

Part B

Code
planes <- data.table(planes)

mergeFP <- merge(flights, planes, by = "tailnum", all.x = TRUE)

calculated <- mergeFP[, .(numFlights = .N,
                          avgmph = mean(distance/(air_time/60), 
                                        na.rm = TRUE)),
                      by = model]

calculated[calculated[, which.max(avgmph)], .(model, avgmph,
                                              numFlights)]
     model   avgmph numFlights
    <char>    <num>      <int>
1: 777-222 482.6254          4